********************************************************************************
* matching_firm_w_roa.do
* Purpose: ROBUSTNESS CHECK - Firm PSM adding Return on Assets (ROA) as an
*          additional matching variable.
*
* Difference from baseline (matching_firm.do):
*   - ROA is added as both a bin variable (for exact cell matching) and a
*     quadratic term in the pscore regression.
*   - This tests whether results are sensitive to pre-event profitability
*     differences between treated and control firms.
*
* Output: $data/firm_matched_w_roa.dta
*         $data/worker_matched_w_roa.dta (constructed downstream in matching_worker_w_roa.do)
********************************************************************************
global start_year 	= 2004
global end_year 	= 2016

**# matching
forvalues y = $start_year/$end_year {
	
	use $data/firm_`y', replace

	** drop firms that have less than 10 employees in year y **
	keep if PD7_AvgEmp_NonZero >= 10

	** drop firms with missing values for the match vars **
	drop if mi(naics) | mi(total_revenue) | mi(age) | mi(avg_wage)
	
	** Define treated for firms that go through an M&A event the following year **
	gen treated		= (DEAL_YEAR == `y' + 1)
	
	** drop other M&A firms that are treated in other years ** 
	drop if MnA_firm == 1 & treated == 0
	gsort -treated
	
	* create age, average payrolls, and revenue bins
	foreach var in avg_wage total_revenue age{
	
		if `var' == avg_wage{
			gquantiles 	bin_`var' = `var', xtile n(10)
		}
		else{
			gquantiles 	bin_`var' = `var', xtile n(15)
		}
	}
	
	* create interactions between bins
	* firms must be matches within the same cells
	gegen cell = group(naics2 OPAddressProvince bin_*)
	drop if cell == .
	
	gen 	roa			= (total_revenue - total_expense)/total_assets
		
	* estimate pscore using the eligible sample
	reg treated c.total_revenue##c.total_revenue c.age##c.age c.avg_wage##c.avg_wage c.roa##c.roa
	
	predict pscore

	replace pscore = pscore + 10.0*cell
	
	** we have to sort the data for replication
	gen outcome = rnormal()
	
	*** Fix the sorting of data
	gsort entid_syn
		
	* matching
	psmatch2 treated, outcome(outcome) pscore(pscore) caliper(1) n(1) noreplacement
	
	gen		 id = _id if treated==0 & _weight==1
	replace	 id = _n1 if treated==1 & _weight==1
	
	gegen pairid = group(id)
	gsort pairid
	
	gen year_prior = `y'
	
	drop if mi(id)
	keep entid_syn treated pairid year_prior

	save $data/firm_matched_w_roa_list_`y'.dta,  replace
	
}

drop _all
forvalues y = $start_year/$end_year {
	append using $data/firm_matched_w_roa_list_`y',  keep(entid_syn pairid treated year_prior) force
}
save $data/firm_matched_w_roa_list, replace

forvalues y = 2001/2017{

	use $data/firm_`y', clear

	* keep matched treated and control firms
	merge 1:m 	entid_syn using $data/firm_matched_w_roa_list, keep(3) nogen

	save $data/firm_matched_`y', replace

}

drop _all
forvalues y = 2001/2017{
	append using $data/firm_matched_`y', force
	rm $data/firm_matched_`y'.dta
}

save $data/firm_matched_w_roa_intermid, replace

gsort pairid year_prior -treated year
**# Matched Firm Panel
* create id and deal type variables
* define control variables suchas sector and age
* drop financial sectors
gegen 	firm_id     = group(entid_syn)
gegen 	id 			= group(firm_id year_prior)

replace DEAL 	 = . if year ~= year_prior
replace Acquirer = . if year ~= year_prior
replace DEAL_YEAR= . if year ~= year_prior

** sector - - fill in for missing sector values
drop naics2
gegen	naics_mode	= mode(naics), by(entid_syn) minmode
replace naics		= naics_mode	if mi(naics)
gen 	naics2 		= int(naics/100)
replace naics2  	= 31 if naics2 == 32 | naics2 == 33
replace naics2  	= 44 if naics2 == 45
replace naics2  	= 48 if naics2 == 49
replace naics2 		= 54 if naics2 == 56 | naics2 == 61 | naics2 == 62

**# Time Variables
gen 	t = year - (year_prior + 1)
replace t = 6 	if t >  5 	& ~mi(t)
replace t = -6 	if t < -5 	& ~mi(t)
tab t, gen(ds_)
levelsof t, local(ts)
local end = r(r)
forvalue i = 1(1)`end'{
	local temp = `i' - 7
	label variable ds_`i' "`temp'"
}
replace ds_6 = 0

* the sector prior to the event
gen 	naics2_event	= naics2 if treated == 1 & year == year_prior
gegen 	matched_sector 	= firstnm(naics2_event),		by(pairid year_prior)

* firm age - fill in for missing age values
drop 	dateinc dateinc_year age
gen 	temp_dateinc 	= dofc(BirthDate) if year == year_prior
gegen	dateinc 		= firstnm(temp_dateinc), by(id)
gen 	dateinc_year 	= yofd(dateinc)
gen 	age 			= year - dateinc_year
gen 	age2 			= age^2
gen 	age3 			= age^3
gen 	age4 			= age^4

replace Acquirer 	= . if ~(treated == 1 & year == year_prior)
replace merger 		= . if ~(treated == 1 & year == year_prior)

* pair-level variables
gsort pairid year_prior -treated year
gegen matched_acq 		= firstnm(Acquirer), 			by(pairid year_prior)
gegen matched_deal_type = firstnm(merger),				by(pairid year_prior)

** drop financial sectors **
drop if matched_sector == 52 | matched_sector == 55

compress
save $data/firm_matched_w_roa, replace